library(tidyverse)
source('R/report_plots.R')

## functions ------

# creates yyyy-ww label for grouping data
get_date_week <- function(x){
  y <- lubridate::year(x)
  w <- lubridate::week(x) |> str_pad(2, 'left', 0)
  str_glue("{y}-{w}")
}

# get biweekly bin date for vector of dates
get_date_biweekly <- function(x){
  day_of_week <-  lubridate::wday(x)
  case_when(
    day_of_week %in% 1:3 ~ x + 3 - day_of_week,
    TRUE ~ x + 5 - day_of_week,
  )
}

# lookup table for date
week_midpoint_date_lookup <- function(start = '2021-01-01', end = '2023-01-01'){
  ends = lubridate::as_date(c(start, end))
  tibble(
    date = seq(ends[1], ends[2], by = 1),
    date_week = lubridate::week(date) |> str_pad(2, 'left', '0'),
    date_year = lubridate::year(date),
    week = str_glue("{date_year}-{date_week}"),
  ) |> 
    group_by(week) |> 
    summarise(date = mean(date))
}


binom_ci <- function(x, n){
  Hmisc::binconf(x, n, method = 'wilson', alpha = 0.05) |> 
    as_tibble() |> 
    janitor::clean_names()
}

get_binom_ci <- function(data){
  data |> 
    summarise(
      x = sum(pcr_positive == 'Positive'),
      n = n(),
      binconf = map2(x, n, ~binom_ci(.x, .y))
    ) |> 
    unnest(binconf) |> 
    mutate(across(c(point_est, lower, upper), ~(.x*100) |> round(1)))
}


## data ----------

swabs <- 
  read_rds(here('data/cube.rds')) |> 
  filter(str_detect(site, '^UO_')) 

# data from university of ottawa
wifi <- 
  read_rds(here('data/uo_wifi.rds')) |> 
  filter(date >= min(swabs$date_swab), 
         date <= max(swabs$date_swab))

# lookup table for waste water site names
lookup_ww <- tribble(
  ~site_abbr, ~site,
  'UO_AA',  'Annex Residence',
  'UO_FA',  'Faculty of Social Sciences',
  'UO_FT',  'Friel Residence',
  'UO_NA',  'Northern sampling site',
  'UO_RGN', 'Roger Guindon Hall',
  'UO_ST',  'Southern sampling site',
  'UO_TBT', 'Tabaret Hall'
)

# UOttawa waste water data from R. Delatolla
uottawa_ww <-
  readxl::read_excel(here::here('data/ww_university.xlsx')) |>
  janitor::clean_names() |> 
  mutate(sample_date = as_date(sample_date)) |> 
  filter(
    sample_date > '2021-09-15', 
    sample_date < '2022-05-05',
    !is.na(viral_copies_per_copies_pep_avg),
    site != 'UO_RGN'
    ) |> 
  left_join(lookup_ww, by = c('site' = 'site_abbr')) |> 
  mutate(
    source =
      source_name |>
      str_remove('^Data - uOttawa - ') |>
      str_remove('.xlsx$')
  ) |> 
  transmute(site,
            sample_date, 
            signal = viral_copies_per_copies_pep_avg)

# ottawa wastewater data: daily means
regional_ww <- 
  read_rds(here('data/ww_ottawa.rds')) |> 
  select(sample_date, starts_with('cov_')) |> 
  pivot_longer(contains('cov_')) |> 
  mutate(target = str_extract(name, 'cov_n.'),
         stat = str_extract(name, 'mean|sd'),
         ) |> 
  select(-name) |> 
  pivot_wider(names_from = stat, values_from = value) |> 
  mutate(.lower = mean - sd, .upper = mean + sd)   

# important dates for context
event_dates <- 
  tribble(
    ~event, ~start, ~end,
    'Reading Week',  '2021-10-24', '2021-10-30',
    'Holiday Break', '2021-12-22', '2022-01-04',
    'Closure',       '2022-01-04', '2022-01-31',
    'Closure',       '2022-02-16', '2022-02-21',
    'Reading Week',  '2022-02-20', '2022-02-26',
  ) |> 
  mutate(across(start:end, as_date))

# UOttawa cases data
cases <-
  read_rds(here('data/cases_rule_imputed.rds')) |> 
  select(case, role, case_date, UC:TBT) |>
  mutate(biweek = get_date_biweekly(case_date)) |> 
  nest(associated_sites = UC:TBT)

📊 Summary

This plot shows the counts of positive (red) and negative (yellow) samples collected at each facility over time (x-axis). Samples that could not be tested are shown in navy. Only flocked swabs are shown. (Other sponge swabs were collected on 2022-04-28 were for comparing flocked and sponge swabs.)

plot_timeseries_summary(
  swabs |>
    filter(
      swab_type %in% 'flock',
      !negative_control,
      !is.na(negative_control)
    ),
  height_svg = 3.3,
  width_svg = 8
  )

⡯⡷ Dotplot

This plot shows the counts of positive and negative samples collected at each facility over time.

plot_timeseries_locations(
  swabs |>
    filter(swab_type == 'flock'),
  height_svg = 6,
  width_svg = 8
  )

Model

This section contains results from modeling SARS-CoV-2 cases at UO using swab-PCR results as a predictor.


Specification

We created a random intercepts logistic regression model with the occurrence of cases (binary) as outcome and swab results for the previous week (the proportion of positive swabs) as predictor. The model has a random intercept for each site.

Our model formula is cases ~ positives[lag 1 week] + (1|site).

The model is fit as follows:

swab_model <-
  blme::bglmer(
    cases_binary ~ detection_lag_1week + (1 | site),
    data =  uo_sites,
    family = binomial
  )

swab_model <- read_rds(here('model/uo_swab_model.rds'))


Model response time-series

These plots show the swab results, cases, and predictions by the current model.

# uo cases data - long table
cases_long <-
  read_rds(here('./data/latest_uo_cases.rds')) |>
  filter(!is.na(positive_result),
         positive_result > lubridate::as_date('2021-09-14')) |>
  select(case, role, confirmed_by_test, positive_result, c(`90U`:TBT)) |>
  mutate(
    `Campus Total` = TRUE,
    # 3 days transmissibility
    transmission_start = positive_result - days(3)
    ) |>
  pivot_longer(`90U`:`Campus Total`, names_to = 'site') |>
  filter(value) |>
  select(-value) |>
  mutate(site = fct_relevel(site, "Campus Total", after = 0L))

uo_pred <- read_rds(here('model/uo_pred.rds'))

# create a 3-panel fig for each of 6 buildings.
map(
  # site names for filtering
  .x = swabs$site |> unique() |> str_remove('UO_'),
  .f = ~plot_uo_pred_swab_case(
    swabs = swabs,
    cases = cases_long,
    preds = uo_pred,
    site_select = .x
  )
) |> wrap_plots(guides = 'collect', ncol = 1)



Model summary

These tables show the model coefficients and statistics.

broom.mixed::glance(swab_model) |>
  mutate_if(is.numeric, round, 2) |>
  kableExtra::kable(caption = 'Model statistics',
                    format = "html") |>
  kableExtra::kable_styling(bootstrap_options = "striped",
                            full_width = F, position = "left")
Model statistics
nobs sigma logLik AIC BIC deviance df.residual
84 1 -24.52 55.04 62.33 42.83 81

broom.mixed::tidy(swab_model) |>
  mutate(p.value = as.character(p.value)) |>
  mutate_if(is.numeric, round, 3) |>
  mutate(p.value = as.numeric(p.value)) |>
  kableExtra::kable(caption = 'Model parameters',
                    format = "html") |>
  kableExtra::kable_styling(bootstrap_options = "striped",
                            full_width = F, position = "left")
Model parameters
effect group term estimate std.error statistic p.value
fixed NA (Intercept) -3.301 0.804 -4.107 0.0000401
fixed NA detection_lag1w 7.114 2.112 3.369 0.0007549
ran_pars site sd__(Intercept) 1.067 NA NA NA
# tidy(swab_model, effects = 'ran_vals')



sjPlot::tab_model(
  swab_model,
  show.re.var = TRUE,
  pred.labels = c("(Intercept)", "Swabs @ t-1week"),
  dv.labels = "Relation between UO cases (y/n) and proportion\n of positive swabs the previous week"
)
  Relation between UO cases (y/n) and proportion of positive swabs the previous week
Predictors Odds Ratios CI p
(Intercept) 0.04 0.01 – 0.18 <0.001
Swabs @ t-1week 1229.31 19.59 – 77128.28 0.001
Random Effects
σ2 3.29
τ00 site 1.14
ICC 0.26
N site 6
Observations 84
Marginal R2 / Conditional R2 0.378 / 0.538



Model response curves

This plot shows the modelling data as points (detection lagged 1 week and cases at a given site- y/n), as well as how the probability of future cases varies by the previous weeks detection level and site, according to our model (curves).

read_rds(here('fig/plt_model_resp_curves.rds')) +
  labs(subtitle = 'Points show case/swab data and curves show model probability values')



Random Intercepts for Sites

This plot shows the odds ratios for the intercepts of the model (an intercept for each site).

# random effect plot: site intercepts; axes are flipped
sjPlot::plot_model(swab_model, type = 're') +
  theme_light() +
  labs(title = "Random effects", y = 'Odds Ratio')



EDA

Swabs and Cases

This panel shows the cases counts at UO over the course of our sampling period. The case data shown represents the days on which a positive test was reported (black rug lines) and the presumed start of transmissiblity for each case (red lines).

# generated from file R/plot_UO_case_load.R
read_rds(here('fig/plt_cases_swabs.rds'))



Swabs, Wifi, and CO2

This panel shows linked data from swab results, CO2 readings, and wifi traffic (number of users @ peak, daily) during our study period. Unfortunately, we do not have wifi data for 90U.

set.seed(1234)
uo_multivariate <- make_uo_panel(swabs, wifi)
write_rds(uo_multivariate, here('fig/uo_multivariate.rds'))

uo_multivariate & theme(text = element_text(size = 13))

rm(uo_multivariate)



Wifi Traffic

plot_UO_wifi_ts(wifi, swabs)

This panel shows a time-series of the daily peak number of wifi users at UO facilities. Sampling days are highlighted in blue.


Waste Water - Regional

regional_ww |>
  group_by(sample_date) |>
  summarise(mean = mean(mean)) |>
  ggplot() +
  geom_rect(data = tibble(xmin = min(swabs$date_swab),
                   xmax = max(swabs$date_swab),
                   ymin = -Inf,
                   ymax = Inf),
            aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax),
            fill = 'grey90', color = 'gray',
            size = 0.5,
            lty = 2, alpha = 0.5) +
  geom_text(aes(as_date('2022-01-01'), 0.0035, label = 'UOttawa study'),
            color = 'darkgray') +
  geom_point(aes(sample_date, mean),
             alpha = 0.76, size = 0.85, shape = 1) +
  geom_smooth(aes(sample_date, mean),
              method = 'loess', formula = 'y ~ x',
              span = 0.18, size = 0.75, alpha = 0.25,
              fill = 'blue') +
  scale_shape(solid = F) +
  labs(x = 'Date',
       y = 'Mean SARS-CoV-2 / PPMoV ',
       color = 'PCR Target',
       fill = 'PCR Target',
       shape = 'PCR Target',
       subtitle = 'Ottawa Waste-water: Combined N1 and N2 targets')



Time-series comparison

2022-02-28 Update: This data uses weekly aggregates, the new results use biweekly aggregates (in case you are wondering why the r values aren’t the same).

We probably want to summarise our swabs at the weekly (or biweekly?) level for comparison to the waste-water signal. This is what the weekly campus-wide time-series looks like compared with our weekly campus-wide swab positivity:

ww_weekly_summary <-
  uottawa_ww |>
  mutate(week = get_date_week(sample_date)) |>
  left_join(week_midpoint_date_lookup('2021-01-01', '2022-12-31'),
            by = 'week') |>
  select(date, signal, site) |>
  group_by(date) |>
  summarise(signal_mean = mean(signal),
            signal_sd = sd(signal))

plt_ww_weekly <- ww_weekly_summary |>
  ggplot(aes(date,
             signal_mean,
             # ymin = signal_mean - signal_sd,
             # ymax = signal_sd + signal_sd
             )) +
  geom_line() +
  geom_point() +
  geom_point(data = uottawa_ww,
             aes(as_date(sample_date), signal),
             size = 1, shape = 1, alpha = 0.5) +
  labs(x = 'Date', y = 'SARS-CoV-2 / PMMoV',
       title = 'Weekly campus-wide average SARS-CoV-2 waste-water detection')

swabs_weekly_summary <- swabs |>
  mutate(week = get_date_week(date_swab)) |>
  left_join(week_midpoint_date_lookup('2021-01-01', '2022-12-31'),
            by = 'week') |>
  group_by(date) |>
  summarise(
    x = sum(pcr_positive == 'Positive'),
    n = n(),
    positivity = list(binom_ci(x, n))
    ) |>
  unnest_wider(positivity)

plt_swabs_weekly <- swabs_weekly_summary |>
  ggplot(aes(x = date, y = point_est*100)) +
  geom_line() +
  geom_point() +
  scale_x_date() +
  scale_y_continuous(limits = c(0,100)) +
  labs(x = 'Date', y = 'Swab positivity (%)',
       title = 'Weekly campus-wide surface swab detection rate')

wrap_plots(plt_swabs_weekly, plt_ww_weekly, ncol = 1)

If we look at the relationship between swabs and waste-water on weeks where both results are available, we see that they have a positive correlation.

campus_ww_swab_summary <-
  swabs_weekly_summary |>
  select(date, point_est) |>
  left_join(ww_weekly_summary, by = 'date') |>
  transmute(date, ww = signal_mean, swabs = point_est*100) |> 
  filter(!is.na(ww))

campus_ww_swab_summary |>
  ggplot(aes(swabs, ww)) +
  geom_smooth(method = 'lm', formula = 'y ~ x') +
  geom_point() +
  coord_fixed(ratio = 18000) +
  labs(x = 'Swab positivity (%)',
       y = 'Waste-water signal',
       color = "Sample\nWeek")

cortest_swab_ww <-
  lst(
    pearson = cor.test(
      campus_ww_swab_summary$ww,
      campus_ww_swab_summary$swabs,
      method = 'pearson'
    ),
    pearson_est = pearson$estimate |> round(2),
    pearson_lower = pearson$conf.int[1] |> round(2),
    pearson_upper = pearson$conf.int[2] |> round(2),

    spearman = cor.test(
      campus_ww_swab_summary$ww,
      campus_ww_swab_summary$swabs,
      method = 'spearman'
    ),
    spearman_est = spearman$estimate |> round(3),
    # spearman_lower = spearman$conf.int[1] |> round(2),
    # spearman_upper = spearman$conf.int[2] |> round(2),

    kendall = cor.test(
      campus_ww_swab_summary$ww,
      campus_ww_swab_summary$swabs,
      method = 'kendall'
  ),
    kendall_est = kendall$estimate |> round(2),
    # kendall_lower = kendall$conf.int[1] |> round(2),
    # kendall_upper = kendall$conf.int[2] |> round(2),

) |>
  suppressWarnings() |>
  glue_data(
    "The Pearson's *r* (linear correlation) is {pearson_est} ({pearson_lower}-{pearson_upper}).
    The Spearman's *r* (rank correlation) is {spearman_est}..
    The Kendall's *Tau*  is {kendall_est}."
  )

Pearson’s r is more affected by outliers. Spearman’s r or Kendall’s Tau are probably the more reliable measures of correlation between these variables.



(Data doesn’t appear normally distributed, btw)

campus_ww_swab_summary |>
  pivot_longer(cols = c(ww, swabs)) |>
  ggplot(aes(value)) +
  geom_histogram(bins = 15, color = 'lightgray') +
  facet_wrap(~name, scales = 'free')




Waste Water - UO

I’ve removed the off-campus site (Robert Guidon Hall) and retained the other 6 near-campus sites (raw data below).

uottawa_ww |>
  ggplot(aes(sample_date, signal)) +
  geom_path(alpha = 0.6) +
  geom_point(alpha = 0.5, size = 0.5) +
  labs(x = 'Date', y = 'SARS-CoV-2 / PPMoV') +
  facet_wrap(~site, ncol = 2)